#################################################################################
### Title: Are People Willing to Trade Away Democracy for Desirable Outcomes? ###
### Authors: Jonathan A. Chu, Scott Williamson, Eddy S. F. Yeung              ###
### Content: Data cleaning for the Indian sample                              ###
### Date: September 20, 2025                                                  ###
#################################################################################

### Set-up ----
## Clean the working environment and set the working directory
rm(list = ls())
setwd("~/Desktop/democracy_tradeoff/replication") # change to your own working directory

## Load the required packages
library(tidyverse)
library(estimatr)
library(cregg)
library(expss)
library(cjdata)

## Read the dataset
df <- read_Qualtrics("raw_data_IN.csv")

## Recode variable names for conjoint data
# Forced choice DV
df <- df %>% mutate(Q1.1 = case_when(
  is.na(`1_DV_tradeoff`) == FALSE ~ `1_DV_tradeoff`,
  is.na(`1_Q49`) == FALSE ~ `1_Q49`,
  is.na(`1_Q53`) == FALSE ~ `1_Q53`
))
table(df$Q1.1)

df <- df %>% mutate(Q1.4 = case_when(
  is.na(`2_DV_tradeoff`) == FALSE ~ `2_DV_tradeoff`,
  is.na(`2_Q49`) == FALSE ~ `2_Q49`,
  is.na(`2_Q53`) == FALSE ~ `2_Q53`
))
table(df$Q1.4)

df <- df %>% mutate(Q1.7 = case_when(
  is.na(`3_DV_tradeoff`) == FALSE ~ `3_DV_tradeoff`,
  is.na(`3_Q49`) == FALSE ~ `3_Q49`,
  is.na(`3_Q53`) == FALSE ~ `3_Q53`
))
table(df$Q1.7)

# Rating for Country A
df <- df %>% mutate(Q1.2 = case_when(
  is.na(`1_tradeoff_a`) == FALSE ~ `1_tradeoff_a`,
  is.na(`1_Q50`) == FALSE ~ `1_Q50`,
  is.na(`1_Q54`) == FALSE ~ `1_Q54`
))
table(df$Q1.2)

df <- df %>% mutate(Q1.5 = case_when(
  is.na(`2_tradeoff_a`) == FALSE ~ `2_tradeoff_a`,
  is.na(`2_Q50`) == FALSE ~ `2_Q50`,
  is.na(`2_Q54`) == FALSE ~ `2_Q54`
))
table(df$Q1.5)

df <- df %>% mutate(Q1.8 = case_when(
  is.na(`3_tradeoff_a`) == FALSE ~ `3_tradeoff_a`,
  is.na(`3_Q50`) == FALSE ~ `3_Q50`,
  is.na(`3_Q54`) == FALSE ~ `3_Q54`
))
table(df$Q1.8)

# Rating for Country B
df <- df %>% mutate(Q1.3 = case_when(
  is.na(`1_tradeoff_b`) == FALSE ~ `1_tradeoff_b`,
  is.na(`1_Q51`) == FALSE ~ `1_Q51`,
  is.na(`1_Q55`) == FALSE ~ `1_Q55`
))
table(df$Q1.3)

df <- df %>% mutate(Q1.6 = case_when(
  is.na(`2_tradeoff_b`) == FALSE ~ `2_tradeoff_b`,
  is.na(`2_Q51`) == FALSE ~ `2_Q51`,
  is.na(`2_Q55`) == FALSE ~ `2_Q55`
))
table(df$Q1.6)

df <- df %>% mutate(Q1.9 = case_when(
  is.na(`3_tradeoff_b`) == FALSE ~ `3_tradeoff_b`,
  is.na(`3_Q51`) == FALSE ~ `3_Q51`,
  is.na(`3_Q55`) == FALSE ~ `3_Q55`
))
table(df$Q1.9)

df$Q1.1 <- ifelse(df$Q1.1 == "देश ए", "Country A", "Country B")
df$Q1.4 <- ifelse(df$Q1.4 == "देश ए", "Country A", "Country B")
df$Q1.7 <- ifelse(df$Q1.7 == "देश ए", "Country A", "Country B")

## Add attribute names
df$`F-1-1` <- df$`F-2-1` <- df$`F-3-1` <- "Leader Selection"
df$`F-1-2` <- df$`F-2-2` <- df$`F-3-2` <- "Civil Liberties"
df$`F-1-3` <- df$`F-2-3` <- df$`F-3-3` <- "Leader Constraints"
df$`F-1-4` <- df$`F-2-4` <- df$`F-3-4` <- "Corruption in Politics"
df$`F-1-5` <- df$`F-2-5` <- df$`F-3-5` <- "National Economy"
df$`F-1-6` <- df$`F-2-6` <- df$`F-3-6` <- "Respondent Wealth"
df$`F-1-7` <- df$`F-2-7` <- df$`F-3-7` <- "Public Safety"
df$`F-1-8` <- df$`F-2-8` <- df$`F-3-8` <- "Health Care"
df$`F-1-9` <- df$`F-2-9` <- df$`F-3-9` <- "Minority Treatment"
df$`F-1-10` <- df$`F-2-10` <- df$`F-3-10` <- "Respondent Identity"

## Recode variables of respondent characteristics
# Age (1 = older than 40, 0 = younger than 40)
df <- df %>% mutate(age_bin = case_when(
  age < 40 ~ 1,
  age > 40 ~ 0
))
df$age_bin <- factor(df$age_bin, 0:1, c("Older", "Younger Than 40"))
table(df$age_bin)

# Respondent Identity (1 = minority; 0 = otherwise)
df <- df %>% mutate(minority_bin = case_when(
  minority == "हाँ" ~ 1,
  minority == "नहीं" | minority == "निश्चित नहीं" ~ 0
))
df$minority_bin <- factor(df$minority_bin, 0:1, c("Non-Minority", "Minority"))
table(df$minority_bin)

# Gender (1 = female; 0 = male)
df <- df %>% rename(gender5 = gender)
df <- df %>% mutate(gender_bin = case_when(
  gender5 == "महिला" ~ 1,
  gender5 == "पुरुष" ~ 0
))
df$gender_bin <- factor(df$gender_bin, 0:1, c("Male", "Female"))
table(df$gender_bin)

# Self-reported political ideology (1 = right; 0 = left; NA = neither)
df <- df %>% mutate(ideo_bin = case_when(
  political == "पूर्ण रूप से लेफ्ट" |
    political == "मध्यम रूप से लेफ्ट" |
    political == "थोड़े से लेफ्ट" ~ 0,
  political == "पूर्ण रूप से राइट" |
    political == "कुछ हद तक राइट" |
    political == "मध्यम रूप से राइट" ~ 1
))
df$ideo_bin <- factor(df$ideo_bin, 0:1, c("Leftwing", "Rightwing"))
table(df$ideo_bin)

# Education (1 = college; 0 = no college)
df <- df %>% mutate(edu_bin = case_when(
  edu == "हाई स्कूल से कम" |
    edu == "हाई स्कूल के समकक्ष" |
    edu == "असोशीएट डिग्री (AA)" |
    edu == "कोई कॉलेज, कोई डिग्री नहीं" ~ 0,
  edu == "स्नातक की डिग्री/चार-साल की डिग्री (BB, BS, BBA)" |
    edu == "एडवांस्ड डिग्री (MA, MS, MBA, PHD, JD, MD, आदि)" ~ 1
))
df$edu_bin <- factor(df$edu_bin, 0:1, c("No College", "College"))
table(df$edu_bin)

## Socioeconomic status (1 = high; 0 = low)
df <- df %>% mutate(SES = case_when(
  ladder == "1 (सबसे कम)" |
    ladder == "2" | ladder == "3" | ladder == "4" | ladder == "5" ~ 0,
  ladder == "10 (उच्चतम)" |
    ladder == "6" | ladder == "7" | ladder == "8" | ladder == "9" ~ 1
))
df$SES <- factor(df$SES, 0:1, c("Low SES", "High SES"))
table(df$SES)

## Reshape the dataset into wide format
df_cj <- reshape_conjoint(df, ResponseID, paste0("Q1.", seq(from = 1, to = 9, by = 3)))

## Reorder the factors
# Leader selection
df_cj$`Leader Selection` <- 
  factor(df_cj$`Leader Selection`,
         levels = c("सैन्य तख्तापलट", "वंशानुगत उत्तराधिकार", "अनिर्वाचित अभिजात वर्ग का एक छोटा समूह", "अनुचित चुनाव", "स्वतंत्र और निष्पक्ष चुनाव"),
         labels = c("Military coup", "Hereditary succession", "Unelected elites", "Unfair elections", "Free and fair elections"))

# Civil liberties
df_cj$`Civil Liberties` <- 
  factor(df_cj$`Civil Liberties`,
         levels = c("बिना गंभीर सरकारी दमन के खुद को अभिव्यक्त और व्यवस्थित नहीं कर सकते", "स्वयं को अभिव्यक्त और संगठित कर सकते हैं लेकिन सरकारी दमन के कुछ जोखिम का सामना कर सकते हैं", "खुद को अभिव्यक्त कर सकते हैं और स्वतंत्र रूप से संगठित हो सकते हैं"),
         labels = c("Repressed", "Partially free", "Free"))

# Leader constraints
df_cj$`Leader Constraints` <- 
  factor(df_cj$`Leader Constraints`,
         levels = c("लगभग हमेशा विधायिका और अदालतों के अधिकार को दरकिनार कर सकते हैं", "कभी-कभी विधायिका और अदालतों के अधिकार को दरकिनार कर सकते हैं", "विधायिका और अदालतों के अधिकार का सम्मान करना चाहिए"),
         labels = c("Unconstrained", "Partially constrained", "Constrained"))

# Corruption in politics
df_cj$`Corruption in Politics` <- 
  factor(df_cj$`Corruption in Politics`,
         levels = c("हर समय", "कभी-कभी", "बहुत कम"),
         labels = c("Prevalent", "Occasional", "Rare"))

# National economy
df_cj$`National Economy` <- 
  factor(df_cj$`National Economy`,
         levels = c("कम आय", "मध्यम आय", "उच्च आय"),
         labels = c("Low income", "Middle income", "High income"))

# Respondent wealth
df_cj$`Respondent Wealth` <- ifelse(df_cj$`Respondent Wealth` == "अधिकतम से ज़्यादा", "अधिकतम से ज़्यादा", df_cj$`Respondent Wealth`)
df_cj$`Respondent Wealth` <- 
  factor(df_cj$`Respondent Wealth`,
         levels = c("अधिकतम से कम", "औसत के बराबर", "अधिकतम से ज़्यादा"),
         labels = c("Poor", "Average", "Wealthy"))

# Health care
df_cj$`Health Care` <- 
  factor(df_cj$`Health Care`,
         levels = c("केवल पैसे या कनेक्शन वाले लोगों के लिए सुलभ", "ज्यादातर लोगों के लिए सुलभ"),
         labels = c("For the privileged", "Mostly accessible"))

# Public safety
df_cj$`Public Safety` <- 
  factor(df_cj$`Public Safety`,
         levels = c("बहुत खतरनाक", "थोड़ा खतरनाक", "कुछ हद तक सुरक्षित", "बहुत सुरक्षित"),
         labels = c("Very dangerous", "Somewhat dangerous", "Somewhat safe", "Very safe"))

# Minority treatment
df_cj$`Minority Treatment` <- 
  factor(df_cj$`Minority Treatment`,
         levels = c("अधिकांश लोगों द्वारा गलत व्यवहार किया गया", "कुछ लोगों द्वारा उचित व्यवहार किया गया लेकिन दूसरों द्वारा गलत तरीके से", "ज्यादातर लोगों द्वारा उचित व्यवहार किया गया"),
         labels = c("Mostly unfair", "Sometimes unfair", "Fairly treated"))

# Respondent identity
df_cj$`Respondent Identity` <- 
  factor(df_cj$`Respondent Identity`,
         levels = c("आपको सबसे छोटे अल्पसंख्यक समूह में रखेगी", "आपको दूसरे सबसे बड़े समूह में रखेगी", "आपको सबसे बड़े बहुसंख्यक समूह में रखेगी"),
         labels = c("Minority", "Second largest", "Majority"))

### Save the cleaned dataset ----
## Incorporate covariates
df <- df %>% rename(id = ResponseId)
df_cj <- merge(df_cj, df, by = "id")

## Indicate the country
df_cj$country <- "IN"

## Export the dataset
write.csv(df_cj, "df_IN.csv", row.names = FALSE)
